Import data

This data set gives annual statistics by age group, race, ethnicity, and gender on the number of people who have benefited from homelessness assistance services.

The Homelessness Data Integration System (HDIS), an extensive data repository that compiles and examines information from each of California’s 44 Continuums of Care (CoC), is the source of this information. Every Community of Communities (CoC) collects and disseminates information on the people it serves via a range of programs, such as those aimed at ending homelessness, offering outreach services to the homeless, assisting with permanent housing solutions, and other projects in line with California’s Housing First objectives.

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
age_demo = read.csv("./original data/demo/age_demo.csv")
ethnicity_demo = read.csv("./original data/demo/ethnicity_demo.csv")
gender_demo = read.csv("./original data/demo/gender_demo.csv")
race_demo = read.csv("./original data/demo/race_demo.csv")
head(age_demo)
##   CALENDAR_YEAR LOCATION_ID   LOCATION AGE_GROUP_PUBLIC
## 1          2017         All California            18-24
## 2          2017         All California            25-34
## 3          2017         All California            35-44
## 4          2017         All California            45-54
## 5          2017         All California            55-64
## 6          2017         All California              65+
##   EXPERIENCING_HOMELESSNESS
## 1                     15984
## 2                     28654
## 3                     25831
## 4                     27651
## 5                     23396
## 6                      7111
head(ethnicity_demo)
##   CALENDAR_YEAR LOCATION_ID               LOCATION           ETHNICITY
## 1          2017         All             California     Hispanic/Latinx
## 2          2017         All             California Not Hispanic/Latinx
## 3          2017         All             California             Unknown
## 4          2017      CA-500 Santa Clara County CoC     Hispanic/Latinx
## 5          2017      CA-500 Santa Clara County CoC Not Hispanic/Latinx
## 6          2017      CA-500 Santa Clara County CoC             Unknown
##   EXPERIENCING_HOMELESSNESS
## 1                     60962
## 2                    119153
## 3                      3791
## 4                      5034
## 5                      5074
## 6                        57
head(gender_demo)
##   CALENDAR_YEAR LOCATION_ID   LOCATION              GENDER
## 1          2017         All California              Female
## 2          2017         All California                Male
## 3          2017         All California Non-Singular Gender
## 4          2017         All California  Questioning Gender
## 5          2017         All California         Transgender
## 6          2017         All California             Unknown
##   EXPERIENCING_HOMELESSNESS
## 1                     79670
## 2                    101901
## 3                       148
## 4                         *
## 5                       676
## 6                      1505
head(race_demo)
##   CALENDAR_YEAR LOCATION_ID   LOCATION
## 1          2017         All California
## 2          2017         All California
## 3          2017         All California
## 4          2017         All California
## 5          2017         All California
## 6          2017         All California
##                                            RACE EXPERIENCING_HOMELESSNESS
## 1 American Indian, Alaska Native, or Indigenous                      5638
## 2                       Asian or Asian American                      3005
## 3           Black, African American, or African                     57665
## 4                                Multiple Races                      9048
## 5           Native Hawaiian or Pacific Islander                      2555
## 6                                       Unknown                      6391

Clean data

library(readr)
folder_path <- "./original data/demo/"
csv_files <- list.files(folder_path, pattern = ".csv")

# define a function to process a CSV file
clean_csv <- function(input_file, output_file) {
  data <- read.csv(input_file)
  data_cleaned <- data |> 
    filter(EXPERIENCING_HOMELESSNESS != "*") |>
    select(-LOCATION_ID) |>
    janitor::clean_names() 
   write.csv(data_cleaned, file = output_file, row.names = FALSE)
   #return(data_cleaned)
}

output_folder <- "./processed data/demo/"

# process CSV files using the lapply function
cleaned_data_list <- lapply(csv_files, function(file) {
  input_file <- paste0(folder_path, file)
  output_file <- paste0(output_folder, "cleaned_", file)
  clean_csv(input_file, output_file)
})

# import clean data
age_clean = read.csv("./processed data/demo/cleaned_age_demo.csv")
ethnicity_clean = read.csv("./processed data/demo/cleaned_ethnicity_demo.csv")
gender_clean = read.csv("./processed data/demo/cleaned_gender_demo.csv")
race_clean = read.csv("./processed data/demo/cleaned_race_demo.csv")

Data visualization

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
age_plot <- age_clean |>
  select(-location) |>
  group_by(calendar_year,age_group_public) |>
  summarise(total_homelessness = sum(experiencing_homelessness))
## `summarise()` has grouped output by 'calendar_year'. You can override using the
## `.groups` argument.
age_plot |> 
  mutate(text_label = str_c("Year: ", calendar_year, "\nAge Group: ", age_group_public)) |> 
  plot_ly(x = ~calendar_year, y = ~total_homelessness, type = "scatter", mode = "line", color = ~age_group_public, colors = "viridis", text = ~text_label, alpha = 0.8) |>
    layout(title = "Total Homelessness by Year and Age Group")
ethnicity_plot <- ethnicity_clean |>
  select(-location) |>
  group_by(calendar_year,ethnicity) |>
  summarise(total_homelessness = sum(experiencing_homelessness))
## `summarise()` has grouped output by 'calendar_year'. You can override using the
## `.groups` argument.
ethnicity_plot |> 
  mutate(text_label = str_c("Year: ", calendar_year, "\nEthnicity: ", ethnicity)) |>
  plot_ly(x = ~calendar_year, y = ~total_homelessness, type = "bar", color = ~ethnicity, colors = "viridis", text = ~text_label, alpha = 0.8) |>
  layout(
    title = "Total Homelessness by Year and ethnicity")
gender_plot <- gender_clean |>
  select(-location) |>
  group_by(calendar_year,gender) |>
  summarise(total_homelessness = sum(experiencing_homelessness))
## `summarise()` has grouped output by 'calendar_year'. You can override using the
## `.groups` argument.
gender_plot  |> 
  mutate(text_label = str_c("Year: ", calendar_year, "\nGender: ", gender)) |>
  plot_ly(x = ~calendar_year, y = ~total_homelessness, type = "bar", color = ~gender, colors = "viridis", text = ~text_label, alpha = 0.8) |>
  layout(
    title = "Total Homelessness by Year and gender")
race_plot <- race_clean |>
  select(-location) |>
  group_by(calendar_year,race) |>
  summarise(total_homelessness = sum(experiencing_homelessness))
## `summarise()` has grouped output by 'calendar_year'. You can override using the
## `.groups` argument.
race_plot  |> 
  mutate(text_label = str_c("Year: ", calendar_year, "\nRace: ", race)) |>
  plot_ly(x = ~calendar_year, y = ~total_homelessness,  type = "scatter", mode = "line", color = ~race, colors = "viridis", text = ~text_label, alpha = 0.8) |>
  layout(
    title = "Total Homelessness by Year and gender" ,
    legend = list(
      x = 1,
      y = 0.5, 
      traceorder = "normal",
      bgcolor = "white", 
      bordercolor = "white",
      borderwidth = 0.5
    ))
# pie chart about the race
race_clean |>
  group_by(race) |>
  summarise(total_homelessness = sum(experiencing_homelessness)) |>
  mutate(percentage = total_homelessness / sum(total_homelessness)) |>
  plot_ly(labels = ~race, values = ~percentage, type = "pie", hole = 0.4) |>
   layout(title = "Percentage of Total Homeless by Race")